#functions that we need
#install.packages('bnlearn')
library(bnlearn)
# define a function that takes a dataframe of returns, a type of test and a fixed variable
# it returns a dataframe with p-values and correlations for every pair of stocks given the fixed variable
ci_test_pairs <- function(df, test, fixed) {
  # get the names of the columns that are not the fixed variable
  stocks <- names(df)[names(df) != fixed]
  # create an empty dataframe to store the results
  results <- data.frame()
  # loop over all pairs of stocks
  for (i in 1:(length(stocks) - 1)) {
    for (j in (i + 1):length(stocks)) {
      # get the names of the pair
      x <- stocks[i]
      y <- stocks[j]
      # perform the conditional independence test given the fixed variable
      test_result <- ci.test(x, y, fixed, data = df, test = test)
      # extract the p-value and the correlation from the test result
      p_value <- test_result$p.value
      correlation <- cor(df[[x]], df[[y]])
      # append a row to the results dataframe with the pair names, p-value and correlation
      results <- rbind(results, data.frame(x = x, y = y, p_value = p_value, correlation = correlation))
    }
  }
  # return the results dataframe
  return(results)
}

correlation_threshold <- function(cor_matrix, threshold) {
  
  # Get the lower triangle of the correlation matrix
  cor_lower <- cor_matrix[lower.tri(cor_matrix)]
  
  # Get the indices of the lower triangle of the correlation matrix
  cor_indices <- which(lower.tri(cor_matrix), arr.ind = TRUE)
  
  # Create a data frame with the pairs of variables and their correlations
  df <- data.frame(x = rownames(cor_matrix)[cor_indices[, 1]],
                   y = colnames(cor_matrix)[cor_indices[, 2]],
                   edge = ifelse(abs(cor_lower) >= threshold, 1, 0))
  
  return(df)
}

p_value_eval<- function(x,thresh){
  if(x<thresh){
    ret=1
  }
  else{
    ret=0
    
  }
  return(ret)
}


grapher<-function(example1,compl){
library(dplyr) 
library(igraph)
#filtered only the edges that contain 1
edges1 <- filter(example1, edge == "1")
edges1
#plotted the graph 

g <- graph_from_data_frame(edges1, directed = FALSE)

if(compl==TRUE){
  g<- complementer(g)
}

plot(g, vertex.label = V(g)$names, edge.label = E(g)$weight)
}
#reading the data and getting rid of HSI

df<- read.csv("/users/amirreza/Desktop/returns_data.csv")

df<- df[,-12]


#different slices corresponding to federal reserve interest rates hiking/cutting/pausing

df1<- df[ df$Date> "2001-11-01"  &   df$Date < "2001-12-31",-1 ]
 
df2<- df[ df$Date> "2002-01-01"  &   df$Date < "2002-10-31",-1 ]

df3<- df[ df$Date> "2002-11-01"  &   df$Date < "2004-06-31",-1 ]

df4<- df[ df$Date> "2004-07-01"  &   df$Date < "2006-08-31",-1 ]

df5<- df[ df$Date> "2006-08-31"  &   df$Date < "2007-08-01",-1 ]

df6<- df[ df$Date> "2007-08-01"  &   df$Date < "2008-05-01",-1 ]

df7<- df[ df$Date> "2008-05-01"  &   df$Date < "2008-09-31",-1 ]

df8<- df[ df$Date> "2008-09-31"  &   df$Date < "2009-01-01",-1 ]

df9<- df[ df$Date> "2009-01-01"  &   df$Date < "2015-12-31",-1 ]

df10<- df[ df$Date> "2016-01-01"  &   df$Date < "2019-07-31",-1 ]

df11<- df[ df$Date> "2019-07-31"  &   df$Date < "2020-05-01",-1 ]

df12<- df[ df$Date> "2020-05-01"  &   df$Date < "2022-02-01",-1 ]

dff = list(df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11,df12)

results=list()

for (i in 1:12){
  res<-ci_test_pairs(df = dff[[i]],fixed="DXY")
  results[[i]]<- res
  
}

#the result of the test is available as a list, each element corresponds to a time-slice 
df<- read.csv("/users/amirreza/Desktop/returns_data_classified.csv")

df<- df[,-12]


#different slices corresponding to federal reserve interest rates hiking/cutting/pausing

df1<- df[ df$Date> "2000-11-01"  &   df$Date < "2001-12-31",-1 ]
 
df2<- df[ df$Date> "2002-01-01"  &   df$Date < "2002-10-31",-1 ]

df3<- df[ df$Date> "2002-11-01"  &   df$Date < "2004-06-31",-1 ]

df4<- df[ df$Date> "2004-07-01"  &   df$Date < "2006-08-31",-1 ]

df5<- df[ df$Date> "2006-08-31"  &   df$Date < "2007-08-01",-1 ]

df6<- df[ df$Date> "2007-08-01"  &   df$Date < "2008-05-01",-1 ]

df7<- df[ df$Date> "2008-05-01"  &   df$Date < "2008-09-31",-1 ]

df8<- df[ df$Date> "2008-09-31"  &   df$Date < "2009-01-01",-1 ]

df9<- df[ df$Date> "2009-01-01"  &   df$Date < "2015-12-31",-1 ]

df10<- df[ df$Date> "2016-01-01"  &   df$Date < "2019-07-31",-1 ]

df11<- df[ df$Date> "2019-07-31"  &   df$Date < "2020-05-01",-1 ]

df12<- df[ df$Date> "2020-05-01"  &   df$Date < "2022-02-01",-1 ]

dff_count = list(df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11,df12)

results_count=list()

for (i in 1:12){
  res_count<-ci_test_pairs(df = dff_count[[i]],fixed="DXY")
  results_count[[i]]<- res_count
  
}
#plotting the graphs for inverse of correlation matrix
#inver<- list()
#for(i in 1:12){
 # inver[[i]]<- solve(cor(dff[[i]]))
#}
   set.seed(123)

for(i in 1:12){

   grapher(correlation_threshold(cor(dff[[i]]),threshold = 0.08),compl = TRUE)
   title(paste("Marginal correlation graphs for the ", i, "th slice"))

}
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:bnlearn':
## 
##     as.igraph, compare, degree, subgraph
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union

#plotting the graph for conditional independence test on the continuous returns

graphss <- list()
for(i in 1:12){

  l<- lapply(results[[i]]$p_value, p_value_eval, thresh=0.0001)
  results[[i]]$edge<- as.integer(l)
  graphss[[i]] <- results[[i]][c("x","y","edge")]
  grapher(graphss[[i]],compl = TRUE)
  title(paste("CI test of continous returns for the " ,i, "th slice, thresh=0.0001"))
}

#plotting the graph for the conditional independence test on the count(1,0,-1) data

for(i in 1:12){
  set.seed(123)
  l<- lapply(results_count[[i]]$p_value, p_value_eval, thresh=0.0001)
  results_count[[i]]$edge<- as.integer(l)
  graphss[[i]] <- results_count[[i]][c("x","y","edge")]
  grapher(graphss[[i]],compl = TRUE)
  title(paste("CI test of counts of returns for the " ,i, "th slice, thresh=0.0001"))

  }

graphss <- list()
for(i in 1:12){

  l<- lapply(results[[i]]$p_value, p_value_eval, thresh=0.0001)
  results[[i]]$edge<- as.integer(l)
  graphss[[i]] <- results[[i]][c("x","y","edge")]
  grapher(graphss[[i]],compl = TRUE)
  title(paste("CI test of continous returns for the " ,i, "th slice, thresh=0.0001"))
}

#plotting the graph for the conditional independence test on the count(1,0,-1) data

for(i in 1:12){
  set.seed(123)
  l<- lapply(results_count[[i]]$p_value, p_value_eval, thresh=0.0001)
  results_count[[i]]$edge<- as.integer(l)
  graphss[[i]] <- results_count[[i]][c("x","y","edge")]
  grapher(graphss[[i]],compl = TRUE)
  title(paste("CI test of counts of returns for the " ,i, "th slice, thresh=0.0001"))

  }